package org.datadog.jmxfetch; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.InputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import javax.security.auth.login.FailedLoginException; import org.apache.log4j.Appender; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.commons.lang3.CharEncoding; import org.datadog.jmxfetch.reporter.Reporter; import org.datadog.jmxfetch.util.CustomLogger; import com.beust.jcommander.JCommander; import com.beust.jcommander.ParameterException; @SuppressWarnings("unchecked") public class App { private final static Logger LOGGER = Logger.getLogger(App.class.getName()); private final static String SERVICE_DISCOVERY_PREFIX = "SD-"; public static final String CANNOT_CONNECT_TO_INSTANCE = "Cannot connect to instance "; private static final String SD_CONFIG_SEP = "#### SERVICE-DISCOVERY ####"; private static int loopCounter; private AtomicBoolean reinit = new AtomicBoolean(false); private ConcurrentHashMap<String, YamlParser> configs; private ConcurrentHashMap<String, YamlParser> sdConfigs = new ConcurrentHashMap<String, YamlParser>(); private ArrayList<Instance> instances = new ArrayList<Instance>(); private LinkedList<Instance> brokenInstances = new LinkedList<Instance>(); private AppConfig appConfig; public App(AppConfig appConfig) { this.appConfig = appConfig; this.configs = getConfigs(appConfig); } /** * Main entry of JMXFetch * <p/> * See AppConfig class for more details on the args */ public static void main(String[] args) { // Load the config from the args AppConfig config = new AppConfig(); JCommander jCommander = null; try { // Try to parse the args using JCommander jCommander = new JCommander(config, args); } catch (ParameterException e) { System.out.println(e.getMessage()); System.exit(1); } // Display the help and quit if (config.isHelp() || config.getAction().equals(AppConfig.ACTION_HELP)) { jCommander.usage(); System.exit(0); } // Set up the logger to add file handler CustomLogger.setup(Level.toLevel(config.getLogLevel()), config.getLogLocation()); // The specified action is unknown if (!AppConfig.ACTIONS.contains(config.getAction())) { LOGGER.fatal(config.getAction() + " is not in " + AppConfig.ACTIONS + ". Exiting."); System.exit(1); } // The "list_*" actions can only be used with the reporter if (!config.getAction().equals(AppConfig.ACTION_COLLECT) && !config.isConsoleReporter()) { LOGGER.fatal(config.getAction() + " argument can only be used with the console reporter. Exiting."); System.exit(1); } if(config.getAction().equals(AppConfig.ACTION_LIST_JVMS)) { List<com.sun.tools.attach.VirtualMachineDescriptor> descriptors = com.sun.tools.attach.VirtualMachine.list(); System.out.println("List of JVMs for user " + System.getProperty("user.name") ); for(com.sun.tools.attach.VirtualMachineDescriptor descriptor : descriptors) { System.out.println( "\tJVM id " + descriptor.id() + ": '" + descriptor.displayName() + "'" ); } System.exit(0); } // Set up the shutdown hook to properly close resources attachShutdownHook(); LOGGER.info("JMX Fetch has started"); App app = new App(config); // Initiate JMX Connections, get attributes that match the yaml configuration app.init(false); // We don't want to loop if the action is list_* as it's just used for display information about what will be collected if (config.getAction().equals(AppConfig.ACTION_COLLECT)) { // Start the main loop app.start(); } } /** * Attach a Shutdown Hook that will be called when SIGTERM is sent to JMXFetch */ private static void attachShutdownHook() { class ShutdownHook { public void attachShutDownHook() { Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { LOGGER.info("JMXFetch is closing"); // Properly close log handlers Enumeration<Appender> enume = (Enumeration<Appender>) LOGGER.getAllAppenders(); while (enume.hasMoreElements()) { Appender h = enume.nextElement(); h.close(); } } }); } } new ShutdownHook().attachShutDownHook(); } public void setReinit(boolean reinit) { this.reinit.set(reinit); } public static int getLoopCounter() { return loopCounter; } private static void clearInstances(List<Instance> instances) { Iterator<Instance> iterator = instances.iterator(); while (iterator.hasNext()) { Instance instance = iterator.next(); instance.cleanUp(); iterator.remove(); } } private String getSDName(String config){ String[] splitted = config.split(System.getProperty("line.separator"), 2); return SERVICE_DISCOVERY_PREFIX + splitted[0].substring(2, splitted[0].length()); } private FileInputStream newSdPipe() { FileInputStream sdPipe = null; try { sdPipe = new FileInputStream(appConfig.getServiceDiscoveryPipe()); //Should we use RandomAccessFile? LOGGER.info("Named pipe for Service Discovery opened"); } catch (FileNotFoundException e) { LOGGER.info("Unable to open named pipe for Service Discovery."); } return sdPipe; } public boolean processServiceDiscovery(byte[] buffer) { boolean reinit = false; String[] discovered; try { String configs = new String(buffer, CharEncoding.UTF_8); discovered = configs.split(App.SD_CONFIG_SEP + System.getProperty("line.separator")); } catch(UnsupportedEncodingException e) { LOGGER.debug("Unable to parse byte buffer to UTF-8 String."); return false; } for (String config : discovered) { if (config == null || config.isEmpty()) { continue; } try{ String name = getSDName(config); LOGGER.debug("Attempting to apply config. Name: " + name + "\nconfig: \n" + config); InputStream stream = new ByteArrayInputStream(config.getBytes(CharEncoding.UTF_8)); YamlParser yaml = new YamlParser(stream); if (this.addConfig(name, yaml)){ reinit = true; LOGGER.debug("Configuration added succesfully reinit in order"); } } catch(UnsupportedEncodingException e) { LOGGER.debug("Unable to parse byte buffer to UTF-8 String."); } } return reinit; } void start() { // Main Loop that will periodically collect metrics from the JMX Server long start_ms = System.currentTimeMillis(); long delta_s = 0; FileInputStream sdPipe = null; if(appConfig.getSDEnabled()) { LOGGER.info("Service Discovery enabled"); sdPipe = newSdPipe(); } while (true) { // Exit on exit file trigger... if (appConfig.getExitWatcher().shouldExit()){ LOGGER.info("Exit file detected: stopping JMXFetch."); System.exit(0); } // any SD configs waiting in pipe? if(sdPipe == null && appConfig.getSDEnabled()) { // If SD is enabled and the pipe is not open, retry opening pipe sdPipe = newSdPipe(); } try { int len; if(sdPipe != null && (len = sdPipe.available()) > 0) { byte[] buffer = new byte[len]; sdPipe.read(buffer); setReinit(processServiceDiscovery(buffer)); } } catch(IOException e) { LOGGER.warn("Unable to read from pipe - Service Discovery configuration may have been skipped."); } long start = System.currentTimeMillis(); if (this.reinit.get()) { init(true); } if (instances.size() > 0) { doIteration(); } else { LOGGER.warn("No instance could be initiated. Retrying initialization."); appConfig.getStatus().flush(); configs = getConfigs(appConfig); init(true); } long length = System.currentTimeMillis() - start; LOGGER.debug("Iteration ran in " + length + " ms"); // Sleep until next collection try { int loopPeriod = appConfig.getCheckPeriod(); LOGGER.debug("Sleeping for " + loopPeriod + " ms."); Thread.sleep(loopPeriod); } catch (InterruptedException e) { LOGGER.warn(e.getMessage(), e); } } } public void doIteration() { loopCounter++; Reporter reporter = appConfig.getReporter(); Iterator<Instance> it = instances.iterator(); while (it.hasNext()) { Instance instance = it.next(); LinkedList<HashMap<String, Object>> metrics; String instanceStatus = Status.STATUS_OK; String scStatus = Status.STATUS_OK; String instanceMessage = null; int numberOfMetrics = 0; try { if (!instance.timeToCollect()) { LOGGER.debug("it is not time to collect, skipping run for " + instance.getName()); continue; } metrics = instance.getMetrics(); numberOfMetrics = metrics.size(); if (numberOfMetrics == 0) { instanceMessage = "Instance " + instance + " didn't return any metrics"; LOGGER.warn(instanceMessage); instanceStatus = Status.STATUS_ERROR; scStatus = Status.STATUS_ERROR; brokenInstances.add(instance); } else if (instance.isLimitReached()) { instanceMessage = "Number of returned metrics is too high for instance: " + instance.getName() + ". Please read http://docs.datadoghq.com/integrations/java/ or get in touch with Datadog " + "Support for more details. Truncating to " + instance.getMaxNumberOfMetrics() + " metrics."; instanceStatus = Status.STATUS_WARNING; // We don't want to log the warning at every iteration so we use this custom logger. CustomLogger.laconic(LOGGER, Level.WARN, instanceMessage, 0); } if(numberOfMetrics > 0) reporter.sendMetrics(metrics, instance.getName()); } catch (IOException e) { instanceMessage = "Unable to refresh bean list for instance " + instance; LOGGER.warn(instanceMessage, e); instanceStatus = Status.STATUS_ERROR; scStatus = Status.STATUS_ERROR; brokenInstances.add(instance); } this.reportStatus(appConfig, reporter, instance, numberOfMetrics, instanceMessage, instanceStatus); this.sendServiceCheck(reporter, instance, instanceMessage, scStatus); } // Iterate over broken" instances to fix them by resetting them it = brokenInstances.iterator(); while (it.hasNext()) { Instance instance = it.next(); // Clearing rates aggregator so we won't compute wrong rates if we can reconnect reporter.clearRatesAggregator(instance.getName()); LOGGER.warn("Instance " + instance + " didn't return any metrics." + "Maybe the server got disconnected ? Trying to reconnect."); // Remove the broken instance from the good instance list so jmxfetch won't try to collect metrics from this broken instance during next collection instance.cleanUp(); instances.remove(instance); // Resetting the instance Instance newInstance = new Instance(instance, appConfig); try { // Try to reinit the connection and force to renew it LOGGER.info("Trying to reconnect to: " + newInstance); newInstance.init(true); // If we are here, the connection succeeded, the instance is fixed. It can be readded to the good instances list instances.add(newInstance); it.remove(); } catch(Exception e) { String warning = null; if(e instanceof IOException ) { warning = CANNOT_CONNECT_TO_INSTANCE + instance + ". Is a JMX Server running at this address?"; LOGGER.warn(warning); } else if (e instanceof SecurityException) { warning = CANNOT_CONNECT_TO_INSTANCE + instance + " because of bad credentials. Please check your credentials"; LOGGER.warn(warning); } else if (e instanceof FailedLoginException) { warning = CANNOT_CONNECT_TO_INSTANCE + instance + " because of bad credentials. Please check your credentials"; LOGGER.warn(warning); } else { warning = CANNOT_CONNECT_TO_INSTANCE + instance + " for an unknown reason." + e.getMessage(); LOGGER.fatal(warning, e); } this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR); this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR); } } try { appConfig.getStatus().flush(); } catch (Exception e) { LOGGER.error("Unable to flush stats.", e); } } public boolean addConfig(String name, YamlParser config) { // named groups not supported with Java6: "(?<check>.{1,30})_(?<version>\\d{0,30})" Pattern pattern = Pattern.compile(SERVICE_DISCOVERY_PREFIX+"(.{1,30})_(\\d{0,30})"); Matcher matcher = pattern.matcher(name); if (!matcher.find()) { // bad name. return false; } // Java 6 doesn't allow name matching - group 1 is "check" String check = matcher.group(1); if (this.configs.containsKey(check)) { // there was already a file config for the check. return false; } this.sdConfigs.put(name, config); this.setReinit(true); return true; } private ConcurrentHashMap<String, YamlParser> getConfigs(AppConfig config) { ConcurrentHashMap<String, YamlParser> configs = new ConcurrentHashMap<String, YamlParser>(); YamlParser fileConfig; List<String> fileList = config.getYamlFileList(); if (fileList == null) { return configs; } for (String fileName : fileList) { File f = new File(config.getConfdDirectory(), fileName); String name = f.getName().replace(".yaml", ""); FileInputStream yamlInputStream = null; String yamlPath = f.getAbsolutePath(); try { LOGGER.info("Reading " + yamlPath); yamlInputStream = new FileInputStream(yamlPath); fileConfig = new YamlParser(yamlInputStream); configs.put(name, fileConfig); } catch (FileNotFoundException e) { LOGGER.warn("Cannot find " + yamlPath); } catch (Exception e) { LOGGER.warn("Cannot parse yaml file " + yamlPath, e); } finally { if (yamlInputStream != null) { try { yamlInputStream.close(); } catch (IOException e) { // ignore } } } } LOGGER.info("Found " + configs.size() + " config files"); return configs; } private void reportStatus(AppConfig appConfig, Reporter reporter, Instance instance, int metricCount, String message, String status) { String checkName = instance.getCheckName(); appConfig.getStatus().addInstanceStats(checkName, instance.getName(), metricCount, reporter.getServiceCheckCount(checkName), message, status); } private void sendServiceCheck(Reporter reporter, Instance instance, String message, String status) { String checkName = instance.getCheckName(); reporter.sendServiceCheck(checkName, status, message, instance.getServiceCheckTags()); reporter.resetServiceCheckCount(checkName); } public void init(boolean forceNewConnection) { clearInstances(instances); clearInstances(brokenInstances); Reporter reporter = appConfig.getReporter(); Iterator<Entry<String, YamlParser>> it = configs.entrySet().iterator(); // SD config cache doesn't remove configs - it just overwrites. Iterator<Entry<String, YamlParser>> itSD = sdConfigs.entrySet().iterator(); while (it.hasNext() || itSD.hasNext()) { Map.Entry<String, YamlParser> entry; boolean sdIterator = false; if (it.hasNext()) { entry = it.next(); } else { entry = itSD.next(); sdIterator = true; } String name = entry.getKey(); YamlParser yamlConfig = entry.getValue(); if(!sdIterator) { it.remove(); } ArrayList<LinkedHashMap<String, Object>> configInstances = ((ArrayList<LinkedHashMap<String, Object>>) yamlConfig.getYamlInstances()); if (configInstances == null || configInstances.size() == 0) { String warning = "No instance found in :" + name; LOGGER.warn(warning); appConfig.getStatus().addInitFailedCheck(name, warning, Status.STATUS_ERROR); continue; } for (LinkedHashMap<String, Object> configInstance : configInstances) { Instance instance; //Create a new Instance object try { instance = new Instance(configInstance, (LinkedHashMap<String, Object>) yamlConfig.getInitConfig(), name, appConfig); } catch (Exception e) { String warning = "Unable to create instance. Please check your yaml file"; appConfig.getStatus().addInitFailedCheck(name, warning, Status.STATUS_ERROR); LOGGER.error(warning, e); continue; } try { // initiate the JMX Connection instance.init(forceNewConnection); instances.add(instance); } catch (IOException e) { instance.cleanUp(); brokenInstances.add(instance); String warning = CANNOT_CONNECT_TO_INSTANCE + instance + ". " + e.getMessage(); this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR); this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR); LOGGER.error(warning, e); } catch (Exception e) { instance.cleanUp(); brokenInstances.add(instance); String warning = "Unexpected exception while initiating instance " + instance + " : " + e.getMessage(); this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR); this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR); LOGGER.error(warning, e); } } } } }